Lets review MLB draft season.
Start by loading the packages
suppressMessages({
library(tidyverse) #ggplot2 dplyr tibble tidyr purrr forecats
library(ggrepel) #automatically position non-overlapping text labels
library(glue) #interpreted literal strings
library(gt)
library(gtExtras)
library(paletteer)
library(mlbplotR)
})
Next we load the team logos
teams_colors_logos <- mlbplotR::load_mlb_teams() %>%
filter(!team_abbr %in% c("AL", "NL", "MLB")) %>%
mutate(
a = rep(1:6, 5),
b = sort(rep(1:5, 6), decreasing=T),
alpha = ifelse(grepl("A", team_abbr),1,0.75),
color = ifelse(grepl("E", team_abbr), "b/w", NA)
)
Now lets load our draft data and clean it up.
exposure <- read.csv("./data/exposure_mar20.csv")
exposure <- exposure %>%
mutate(Picked.At = as.Date(as.POSIXct(exposure$Picked.At, format="%Y-%m-%d %H:%M:%S", tz="UTC")),
name = paste(First.Name, Last.Name)) %>%
select(name, Team, Position, Picked.At, Pick.Number, Draft) %>%
left_join(teams_colors_logos %>% select(team_abbr, team_logo_espn), by=c('Team'='team_abbr')) %>%
left_join(read.csv("./data/playerids.csv"), by=c('name'='Name')) %>%
mutate(
copy = paste0(name, Draft),
playerid = as.double(playerid)) %>%
distinct(copy, .keep_all = T) %>%
left_join(mlbplotR::load_headshots() %>% select(fangraphs_id, espn_headshot) %>% drop_na(fangraphs_id), by=c("playerid"="fangraphs_id")) %>%
select(-copy) %>%
left_join(read.csv("./projections_season/rankings_mar20.csv") %>%
mutate(name = paste(firstName, lastName),
adp = as.numeric(adp)) %>%
select(name, adp, projectedPoints, positionRank),
by=c("name")) %>%
mutate(value = Pick.Number-adp,
rel_value = round(value/adp, digits = 2),
positionGroup = gsub("[^A-Z]","",positionRank)) %>%
drop_na(adp) %>%
arrange(Pick.Number)
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
Highest owned players
#group by player
exposure %>%
group_by(name, espn_headshot) %>%
summarise(count=n()) %>%
arrange(-count) %>%
ungroup() %>%
mutate(own = round(count/length(unique(exposure$Draft)),digits = 2)) %>%
slice_head(n=10) %>%
gt() %>%
gt_img_rows(columns = espn_headshot, height = 50) %>%
gt_theme_dark()
## `summarise()` has grouped output by 'name'. You can override using the
## `.groups` argument.
| name | espn_headshot | count | own |
|---|---|---|---|
| Mark Canha | 16 | 0.28 | |
| J.D. Martinez | 15 | 0.26 | |
| Jake Cronenworth | 13 | 0.23 | |
| Aaron Nola | 12 | 0.21 | |
| Brandon Woodruff | 12 | 0.21 | |
| Logan Webb | 12 | 0.21 | |
| Lourdes Gurriel Jr. | 12 | 0.21 | |
| Xander Bogaerts | 12 | 0.21 | |
| Alek Manoah | 11 | 0.19 | |
| Andrew McCutchen | 11 | 0.19 |
Lets look at the drafts grouped by date
#drafts by date
drafts_by_date <- exposure %>%
group_by(Picked.At) %>%
summarize(total_picks = n(),
total_value = sum(value, na.rm = T),
total_rel_value = sum(rel_value, na.rm = T)) %>%
mutate(value_per_pick = round(total_value/total_picks,digits = 2),
rel_value_per_pick = round(total_rel_value/total_picks,digits=2))
drafts_by_date %>% gt()
| Picked.At | total_picks | total_value | total_rel_value | value_per_pick | rel_value_per_pick |
|---|---|---|---|---|---|
| 2023-02-19 | 20 | -21.0 | 0.25 | -1.05 | 0.01 |
| 2023-02-21 | 172 | -399.5 | -0.65 | -2.32 | 0.00 |
| 2023-02-22 | 111 | 232.2 | 7.02 | 2.09 | 0.06 |
| 2023-02-23 | 177 | -26.8 | 0.50 | -0.15 | 0.00 |
| 2023-02-25 | 120 | 118.6 | 3.09 | 0.99 | 0.03 |
| 2023-02-26 | 20 | -29.3 | 0.04 | -1.46 | 0.00 |
| 2023-02-27 | 39 | -99.8 | 0.69 | -2.56 | 0.02 |
| 2023-02-28 | 60 | -8.4 | 0.42 | -0.14 | 0.01 |
| 2023-03-01 | 20 | 35.1 | 0.78 | 1.76 | 0.04 |
| 2023-03-02 | 20 | -13.2 | 0.10 | -0.66 | 0.00 |
| 2023-03-04 | 40 | 29.2 | 0.56 | 0.73 | 0.01 |
| 2023-03-05 | 20 | 107.0 | 1.36 | 5.35 | 0.07 |
| 2023-03-06 | 20 | 32.6 | -0.33 | 1.63 | -0.02 |
| 2023-03-08 | 80 | -27.1 | 0.22 | -0.34 | 0.00 |
| 2023-03-09 | 20 | -42.3 | 0.92 | -2.11 | 0.05 |
| 2023-03-11 | 7 | 21.0 | 0.50 | 3.00 | 0.07 |
| 2023-03-12 | 57 | 67.8 | 0.56 | 1.19 | 0.01 |
| 2023-03-13 | 16 | -84.2 | -0.50 | -5.26 | -0.03 |
| 2023-03-19 | 80 | 239.2 | 2.20 | 2.99 | 0.03 |
| 2023-03-20 | 40 | -32.9 | -0.07 | -0.82 | 0.00 |
Top 10 picks from all drafts in terms of value
#top ten picks by value
exposure %>%
select(name, team_logo_espn, espn_headshot, Pick.Number, adp, value, rel_value, Picked.At) %>%
arrange(-rel_value) %>%
slice_head(n=10) %>%
gt() %>%
gt_img_rows(columns = "team_logo_espn", height = 50) %>%
gt_img_rows(columns = "espn_headshot", height = 50)
| name | team_logo_espn | espn_headshot | Pick.Number | adp | value | rel_value | Picked.At |
|---|---|---|---|---|---|---|---|
| Aaron Judge | 4 | 1.2 | 2.8 | 2.33 | 2023-02-22 | ||
| Ronald Acuña Jr. | 6 | 2.9 | 3.1 | 1.07 | 2023-03-09 | ||
| Aaron Judge | 2 | 1.2 | 0.8 | 0.67 | 2023-02-21 | ||
| Juan Soto | 4 | 2.5 | 1.5 | 0.60 | 2023-02-27 | ||
| Julio Rodríguez | 8 | 5.4 | 2.6 | 0.48 | 2023-02-22 | ||
| Jarred Kelenic | 225 | 153.5 | 71.5 | 0.47 | 2023-02-23 | ||
| Shohei Ohtani | 7 | 4.8 | 2.2 | 0.46 | 2023-02-22 | ||
| Jarred Kelenic | 221 | 153.5 | 67.5 | 0.44 | 2023-03-04 | ||
| Adolis García | 58 | 40.6 | 17.4 | 0.43 | 2023-03-05 | ||
| Fernando Tatis Jr. | 28 | 19.9 | 8.1 | 0.41 | 2023-02-22 |
team drafted
#group by team drafted
exposure %>%
group_by(Team, team_logo_espn) %>%
summarise(count=n()) %>%
arrange(-count) %>%
ungroup() %>%
rename(team = team_logo_espn) %>%
slice_head(n=10) %>%
gt() %>%
gt_img_rows(columns = team) %>%
gt_theme_dark()
## `summarise()` has grouped output by 'Team'. You can override using the
## `.groups` argument.
| Team | team | count |
|---|---|---|
| NYM | 80 | |
| SD | 78 | |
| STL | 76 | |
| ATL | 66 | |
| LAD | 66 | |
| LAA | 56 | |
| NYY | 49 | |
| TB | 49 | |
| HOU | 45 | |
| MIL | 45 |
by position
#group by position
exposure %>%
group_by(Position) %>%
summarise(count=n()) %>%
arrange(-count) %>%
ungroup() %>%
mutate(own = round(count/sum(count),digits = 2)) %>%
gt()
| Position | count | own |
|---|---|---|
| SP | 338 | 0.30 |
| RF | 131 | 0.12 |
| LF | 130 | 0.11 |
| 3B | 129 | 0.11 |
| 1B | 92 | 0.08 |
| CF | 86 | 0.08 |
| SS | 86 | 0.08 |
| 2B | 69 | 0.06 |
| C | 45 | 0.04 |
| DH | 33 | 0.03 |
Number of stacked batters by team
#group by position
exposure %>%
group_by(Draft, Team) %>%
summarise(count=n()) %>%
arrange(-count) %>%
ungroup() %>%
group_by(Team) %>%
summarise(count=n()) %>%
arrange(-count) %>%
gt() %>%
gt_theme_dark()
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
| Team | count |
|---|---|
| NYM | 36 |
| SD | 35 |
| STL | 34 |
| ATL | 33 |
| LAD | 32 |
| LAA | 29 |
| SF | 29 |
| TB | 29 |
| MIL | 28 |
| TOR | 27 |
| HOU | 26 |
| NYY | 26 |
| CLE | 25 |
| MIN | 25 |
| PHI | 25 |
| BOS | 23 |
| SEA | 23 |
| BAL | 21 |
| KC | 21 |
| ARI | 19 |
| MIA | 19 |
| CWS | 17 |
| TEX | 17 |
| PIT | 15 |
| CHC | 12 |
| COL | 11 |
| DET | 8 |
| CIN | 5 |
| WSH | 2 |
| OAK | 1 |
Creating objects to merge to the drafts dataframe
ord <- c("P","IF","OF")
#configurations of P, OF, IF
exposure_config <- exposure %>%
group_by(Draft, positionGroup) %>%
summarise(count=n()) %>%
arrange(Draft, factor(positionGroup, levels = ord)) %>%
ungroup() %>%
group_by(Draft) %>%
summarise(config = as.numeric(paste0(count, collapse = ""))) %>%
ungroup()
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
exposure_config %>%
group_by(config) %>%
summarise(count = n()) %>%
arrange(-count)
## # A tibble: 8 × 2
## config count
## <dbl> <int>
## 1 677 20
## 2 686 13
## 3 587 11
## 4 776 6
## 5 578 3
## 6 767 2
## 7 586 1
## 8 965 1
#stacked batters by draft
exposure_batters <- exposure %>%
filter(positionGroup != "P") %>%
group_by(Draft, Team) %>%
summarise(batters = n()) %>%
ungroup() %>%
filter(batters > 1) %>%
group_by(Draft) %>%
summarise(batters = sum(batters)) %>%
arrange(-batters)
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
# biggest stack per draft
exposure_big_stack <- exposure %>%
filter(positionGroup != "P") %>%
group_by(Draft, Team) %>%
summarise(batters = n()) %>%
ungroup() %>%
group_by(Draft) %>%
summarise(big_stack = max(batters)) %>%
ungroup()
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
#number of teams with stacked batters per draft
exposure_num_teams <- exposure %>%
filter(positionGroup != "P") %>%
group_by(Draft, Team) %>%
summarise(batters = n()) %>%
ungroup() %>%
filter(batters > 1) %>%
group_by(Draft) %>%
summarise(teams_stacked = n())
## `summarise()` has grouped output by 'Draft'. You can override using the
## `.groups` argument.
#find first pick of each draft
first_pick <- exposure %>%
filter(Pick.Number < 13) %>% select(name, Draft) %>%
rename("first_pick" = "name")
#drafts
drafts <- exposure %>%
#drop_na() %>%
group_by(Draft) %>%
summarize(total_picks = n(),
total_value = sum(value),
total_rel_value = sum(rel_value),
Picked.At = last(Picked.At)) %>%
mutate(value_per_pick = round(total_value/total_picks, digits = 2),
rel_value_per_pick = round(total_rel_value/total_picks, digits = 2)) %>%
arrange(-rel_value_per_pick) %>%
left_join(exposure_config, by=c("Draft")) %>%
left_join(exposure_batters, by=c("Draft")) %>%
left_join(exposure_big_stack, by=c("Draft")) %>%
left_join(exposure_num_teams, by=c("Draft")) %>%
left_join(first_pick, by=c("Draft")) %>%
mutate(file = paste(config, teams_stacked, batters, big_stack, first_pick))
#create a list from the dataframe
exp_list <- split(exposure, exposure$Draft)
name_mapping <- data.frame(
old_names = drafts$Draft,
new_names = drafts$file,
stringsAsFactors = F
)
# Find the indices of the old names in the dataframe
name_indices <- match(names(exp_list), name_mapping$old_names)
# Rename the list elements using the new names from the dataframe
names(exp_list)[!is.na(name_indices)] <- name_mapping$new_names[name_indices[!is.na(name_indices)]]